## 

rm(list = ls())

## 

library(rdrobust)
library(tidyverse)
library(pbapply)

## Declare covars

covs <- c('turnout_party_2009_btw', 'soz_vers_beschaeftigte_share',
          'pop_density_km2', 'migration_out_share')

## Get state election date ##

lt <- readRDS('data/data_state.rds') %>%
  filter(!is.na(treated)) %>%
  mutate(year = lubridate::year(date)) %>%
  mutate(state_id = substr(ags, 1, 2))

## Get list of states to use

state_list <- read_rds("data/states_census.rds") %>%
  dplyr::select(applies_census, state_id, census_first_year)

## Merge to main 

lt <- left_join(lt, state_list)

## List of outcomes

outcomes <- c("afd", "cdu_csu", "fdp", "greens", "left", "other_party", 
              "spd", "turnout", "left_total", "right_total",
              'agg_left', 'agg_right', 'agg_center', 'incumbent')

## Source helper function to tidy rdrobust output

source("code/tidy_rd.R")

#### Convert years to periods rel. to treatment ####

table(lt$state_id, lt$year)
lt <- lt %>%
  mutate(time_rel = year - census_first_year)

## This converts each election year to time period relative to treatment
## Errors can be ignored

periods_by_state <- lt %>%
  group_by(state_id) %>%
  summarise(l = list(unique(time_rel))) %>%
  unnest(cols = c(l))
periods_by_state <- periods_by_state %>%
  group_by(state_id) %>%
  summarise(l_r = list(rank(l))) %>%
  unnest(cols = c(l_r)) %>% dplyr::select(-state_id) %>% 
  bind_cols(periods_by_state, .)
periods_by_state <- periods_by_state %>% 
  group_by(state_id) %>%
  mutate(l_r = l_r - max(l_r)) %>%
  filter(!is.na(l)) %>%
  rename(time_rel_period = l_r,
         time_rel = l)

## Merge this back to the main DF

lt <- lt %>%
  left_join(periods_by_state) %>%
  mutate(post = ifelse(time_rel_period > -1, 1, 0),
         treated_post = post * treated)

lt %>% 
  distinct(land,year, census_first_year, post)

## Drop period t-2 (i.e. we only want two periods)

lt <- lt %>% 
  filter(time_rel_period > -2)

## Get first differences

diff_df <- pblapply(outcomes, function(o) {
  out <- lt %>%
    filter(time_rel_period > -2) %>%
    mutate(time_rel_period = time_rel_period + 2)  %>% 
    pivot_wider(values_from = o, names_from = 'time_rel_period', 
                id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2  - o1) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(lt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs)) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1)

# Exclude S-H

diff_df <- diff_df %>%
  filter(!str_sub(ags, 1, 2) == '01')

## Estimate
## This loops over outcomes

out_rd_opt <- pblapply(outcomes, function(o) {
  
  ## Iterate over whether or not to exclude B-W
  
  lapply(c(T,F), function(exclude_state) {
    
    if (exclude_state) { 
      subset_select <- !(substr(diff_df$ags, 1, 2) == '08') 
    } else {
      subset_select <- rep(T, nrow(diff_df))
    }
    
    ## Estimate
    
    out <- rdrobust(y = diff_df[subset_select, o] %>% pull(!!o), 
                    x = diff_df$runvar[subset_select], c = 0,
                    covs = diff_df[subset_select, covs])
    
    ## Return this : Robust B-C SE
    out2 <- out %>% 
      tidy_rd(se_nr = 3) %>% 
      mutate(outcome = o, 
             exclude_state = ifelse(exclude_state, 
                                    'State excluded', 'All states')) %>% 
      dplyr::rename(se = std.error, pval = p.value, 
                    bw = bw_left_h,
                    bw_bias = bw_left_b) %>% 
      mutate(conf.low90 = estimate - qnorm(0.95)*se,
             conf.high90 = estimate + qnorm(0.95)*se)
    
    ## Return
    
    out2
    
  }) %>% reduce(rbind) 
}) %>%
  reduce(rbind)

## Save result

write_rds(out_rd_opt, 'data/rdd_state_results.rds')

